#libraries
suppressPackageStartupMessages({
library(here)
library(ggplot2)
library(purrr)
library(tidyverse)
library(UpSetR)
})
#global objects used later in functions
#DE files
files <- list.files(here("data/analysis/DE"), pattern = "genes\\.csv$", full.names = TRUE)
#cell wall genes
cw <- read_tsv(here("data/analysis/cell-wall/goi-vst-expression_with-geneID.tsv"))
#sets (comparisons)
sets <- c("120h-vs-72h", "72h-vs-24h", "24h-vs-12h", "12h-vs-4h", "4h-vs-1h", "1h-vs-std")Blow are the functions used for data preparations and plotting.
padj < 0.001 and
abs(log2FoldChange) >=1 as the significance level.#functions
# function to plot the volcano plot
fun.volcano <- function(comparison, padj=0.001){
file <- grep(pattern = comparison, files, value = TRUE)
data <- read.csv(file)
data$significance <- ifelse(data$padj < padj & abs(data$log2FoldChange) >= 1,
ifelse(data$log2FoldChange >= 1, "up", "down"),
"not significant")
x_down <- min(data$log2FoldChange) %>% round()
x_up <- max(data$log2FoldChange) %>% round()
y <- -log10(min(data$padj)) %>% round()
# Create a data frame for the text labels
text_data <- data.frame(
x = c(x_down, x_up, 0),
y = c(y, y, y),
label = c(paste(sum(data$significance == "down"), "\n genes"),
paste(sum(data$significance == "up"), "\n genes"),
paste("padj <", padj))
)
ggplot(data, aes(x = log2FoldChange, y = -log10(padj))) +
geom_point(aes(color = significance), alpha = 0.5) +
scale_color_manual(values = c("up" = "red", "down" = "blue", "not significant" = "grey")) +
geom_vline(xintercept = c(-1, 1), linetype = "dashed", color = "grey") +
geom_hline(yintercept = -log10(0.001), linetype = "dashed", color = "grey") +
geom_text(data = text_data, aes(x = x, y = y, label = label)) +
xlab("Log2 Fold Change") +
ylab("-Log10 Adjusted P-value")
}
#function to calculate the frequency matrix for upset plot
fun.mat <- function(condition){
#list of dataframes
df_list <- lapply(files, function(file) {
data <- read.csv(file)
if (!is.null(condition)) {
rows <- eval(parse(text = condition), envir = data)
gene_names <- data[rows, 1]
} else {
gene_names <- data[,1]
}
df <- data.frame(ID = gene_names, comparison = 1)
colnames(df) <- c("ID", gsub(".*Time-(.*)genes\\.csv", "\\1", file))
return(df)
})
names(df_list) <- gsub(".*Time-(.*)genes\\.csv", "\\1", files)
mat <- reduce(df_list, full_join, by = "ID")
mat[is.na(mat)] <- 0
return(mat)
}
#upset plot function
fun.upset <- function(mat = mat, col="black", meta = FALSE) {
if(!isTRUE(meta)){
metadata <- NULL
} else {
cw_genes <- sapply(sets , function(x) {
file <- grep(pattern = x, files, value = TRUE)
genes <- read.csv(file) %>% .$X
cw_genes <- intersect(cw$ID, intersect(genes, mat$ID))
length(cw_genes)
})
metadata.cw <- data.frame(sets = sets, cw_genes = cw_genes)
metadata <- list(data = metadata.cw, plots = list(list(type = "hist", column = "cw_genes", assign = 20, fontsize = 10)))
}
upset(mat,
order.by = c("freq","degree"),
sets = sets[-1],
keep.order = TRUE,
mainbar.y.label = "Number of DE genes (exclusive)",
sets.x.label = "Number of DE genes",
point.size = 7,
shade.color = "grey50",
main.bar.color = col,
nintersects = 20,
text.scale = c(1.8,1.4,1.5,1.4,1.5,1.3),
set.metadata = metadata)
}